{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fdb909fb-bb05-46b7-bb2a-e41e4be90099",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-09T06:16:49.249720Z",
     "iopub.status.busy": "2024-09-09T06:16:49.249382Z",
     "iopub.status.idle": "2024-09-09T06:16:49.253708Z",
     "shell.execute_reply": "2024-09-09T06:16:49.253250Z",
     "shell.execute_reply.started": "2024-09-09T06:16:49.249700Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/mnt/workspace/LLaMA-Factory\n"
     ]
    }
   ],
   "source": [
    "%cd LLaMA-Factory/"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "02ce81f5-7ff3-42bb-b618-4fdcc636dba6",
   "metadata": {},
   "source": [
    "# 1. 准备数据集"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "462d2f03-c52b-4d91-bb4c-c4dea0af183d",
   "metadata": {},
   "source": [
    "# 2.开始微调训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9fbb143f-d54b-484a-9cdd-6dc9107a8816",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2024-09-09T06:22:52.515999Z",
     "iopub.status.busy": "2024-09-09T06:22:52.515676Z",
     "iopub.status.idle": "2024-09-09T06:24:48.154532Z",
     "shell.execute_reply": "2024-09-09T06:24:48.154000Z",
     "shell.execute_reply.started": "2024-09-09T06:22:52.515970Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.10/site-packages/_distutils_hack/__init__.py:55: UserWarning: Reliance on distutils from stdlib is deprecated. Users must rely on setuptools to provide the distutils module. Avoid importing distutils or import setuptools first, and avoid setting SETUPTOOLS_USE_DISTUTILS=stdlib. Register concerns at https://github.com/pypa/setuptools/issues/new?template=distutils-deprecation.yml\n",
      "  warnings.warn(\n",
      "[2024-09-09 14:22:57,705] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
      "\u001b[93m [WARNING] \u001b[0m Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH\n",
      "\u001b[93m [WARNING] \u001b[0m sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.3\n",
      "\u001b[93m [WARNING] \u001b[0m using untested triton version (2.3.1), only 1.0.0 is known to be compatible\n",
      "09/09/2024 14:23:00 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n",
      "[INFO|configuration_utils.py:731] 2024-09-09 14:23:00,981 >> loading configuration file /mnt/workspace/qwen2-1.5B/config.json\n",
      "[INFO|configuration_utils.py:800] 2024-09-09 14:23:00,981 >> Model config Qwen2Config {\n",
      "  \"_name_or_path\": \"/mnt/workspace/qwen2-1.5B\",\n",
      "  \"architectures\": [\n",
      "    \"Qwen2ForCausalLM\"\n",
      "  ],\n",
      "  \"attention_dropout\": 0.0,\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"eos_token_id\": 151645,\n",
      "  \"hidden_act\": \"silu\",\n",
      "  \"hidden_size\": 1536,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 8960,\n",
      "  \"max_position_embeddings\": 32768,\n",
      "  \"max_window_layers\": 28,\n",
      "  \"model_type\": \"qwen2\",\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 28,\n",
      "  \"num_key_value_heads\": 2,\n",
      "  \"rms_norm_eps\": 1e-06,\n",
      "  \"rope_theta\": 1000000.0,\n",
      "  \"sliding_window\": 32768,\n",
      "  \"tie_word_embeddings\": true,\n",
      "  \"torch_dtype\": \"bfloat16\",\n",
      "  \"transformers_version\": \"4.42.4\",\n",
      "  \"use_cache\": true,\n",
      "  \"use_sliding_window\": false,\n",
      "  \"vocab_size\": 151936\n",
      "}\n",
      "\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:00,982 >> loading file vocab.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:00,982 >> loading file merges.txt\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:00,982 >> loading file tokenizer.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:00,982 >> loading file added_tokens.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:00,982 >> loading file special_tokens_map.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:00,982 >> loading file tokenizer_config.json\n",
      "[WARNING|logging.py:313] 2024-09-09 14:23:01,159 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "[INFO|configuration_utils.py:731] 2024-09-09 14:23:01,159 >> loading configuration file /mnt/workspace/qwen2-1.5B/config.json\n",
      "[INFO|configuration_utils.py:800] 2024-09-09 14:23:01,160 >> Model config Qwen2Config {\n",
      "  \"_name_or_path\": \"/mnt/workspace/qwen2-1.5B\",\n",
      "  \"architectures\": [\n",
      "    \"Qwen2ForCausalLM\"\n",
      "  ],\n",
      "  \"attention_dropout\": 0.0,\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"eos_token_id\": 151645,\n",
      "  \"hidden_act\": \"silu\",\n",
      "  \"hidden_size\": 1536,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 8960,\n",
      "  \"max_position_embeddings\": 32768,\n",
      "  \"max_window_layers\": 28,\n",
      "  \"model_type\": \"qwen2\",\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 28,\n",
      "  \"num_key_value_heads\": 2,\n",
      "  \"rms_norm_eps\": 1e-06,\n",
      "  \"rope_theta\": 1000000.0,\n",
      "  \"sliding_window\": 32768,\n",
      "  \"tie_word_embeddings\": true,\n",
      "  \"torch_dtype\": \"bfloat16\",\n",
      "  \"transformers_version\": \"4.42.4\",\n",
      "  \"use_cache\": true,\n",
      "  \"use_sliding_window\": false,\n",
      "  \"vocab_size\": 151936\n",
      "}\n",
      "\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:01,160 >> loading file vocab.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:01,160 >> loading file merges.txt\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:01,160 >> loading file tokenizer.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:01,161 >> loading file added_tokens.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:01,161 >> loading file special_tokens_map.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:23:01,161 >> loading file tokenizer_config.json\n",
      "[WARNING|logging.py:313] 2024-09-09 14:23:01,324 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "09/09/2024 14:23:01 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
      "09/09/2024 14:23:01 - INFO - llamafactory.data.loader - Loading dataset identity.json...\n",
      "Converting format of dataset (num_proc=16): 100%|█| 91/91 [00:00<00:00, 441.39 e\n",
      "Running tokenizer on dataset (num_proc=16): 100%|█| 91/91 [00:01<00:00, 52.90 ex\n",
      "training example:\n",
      "input_ids:\n",
      "[151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 6023, 151645, 198, 151644, 77091, 198, 9707, 0, 358, 1079, 10236, 234, 123, 99315, 11, 458, 15235, 17847, 7881, 553, 26853, 116, 76313, 100732, 13, 2585, 646, 358, 7789, 498, 3351, 30, 151645]\n",
      "inputs:\n",
      "<|im_start|>system\n",
      "You are a helpful assistant.<|im_end|>\n",
      "<|im_start|>user\n",
      "hi<|im_end|>\n",
      "<|im_start|>assistant\n",
      "Hello! I am 猿神, an AI assistant developed by 司展宇. How can I assist you today?<|im_end|>\n",
      "label_ids:\n",
      "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 9707, 0, 358, 1079, 10236, 234, 123, 99315, 11, 458, 15235, 17847, 7881, 553, 26853, 116, 76313, 100732, 13, 2585, 646, 358, 7789, 498, 3351, 30, 151645]\n",
      "labels:\n",
      "Hello! I am 猿神, an AI assistant developed by 司展宇. How can I assist you today?<|im_end|>\n",
      "[INFO|configuration_utils.py:731] 2024-09-09 14:23:05,208 >> loading configuration file /mnt/workspace/qwen2-1.5B/config.json\n",
      "[INFO|configuration_utils.py:800] 2024-09-09 14:23:05,209 >> Model config Qwen2Config {\n",
      "  \"_name_or_path\": \"/mnt/workspace/qwen2-1.5B\",\n",
      "  \"architectures\": [\n",
      "    \"Qwen2ForCausalLM\"\n",
      "  ],\n",
      "  \"attention_dropout\": 0.0,\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"eos_token_id\": 151645,\n",
      "  \"hidden_act\": \"silu\",\n",
      "  \"hidden_size\": 1536,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 8960,\n",
      "  \"max_position_embeddings\": 32768,\n",
      "  \"max_window_layers\": 28,\n",
      "  \"model_type\": \"qwen2\",\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 28,\n",
      "  \"num_key_value_heads\": 2,\n",
      "  \"rms_norm_eps\": 1e-06,\n",
      "  \"rope_theta\": 1000000.0,\n",
      "  \"sliding_window\": 32768,\n",
      "  \"tie_word_embeddings\": true,\n",
      "  \"torch_dtype\": \"bfloat16\",\n",
      "  \"transformers_version\": \"4.42.4\",\n",
      "  \"use_cache\": true,\n",
      "  \"use_sliding_window\": false,\n",
      "  \"vocab_size\": 151936\n",
      "}\n",
      "\n",
      "[INFO|modeling_utils.py:3553] 2024-09-09 14:23:05,227 >> loading weights file /mnt/workspace/qwen2-1.5B/model.safetensors\n",
      "[INFO|modeling_utils.py:1531] 2024-09-09 14:23:05,235 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
      "[INFO|configuration_utils.py:1000] 2024-09-09 14:23:05,237 >> Generate config GenerationConfig {\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"eos_token_id\": 151645\n",
      "}\n",
      "\n",
      "[INFO|modeling_utils.py:4364] 2024-09-09 14:23:07,787 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
      "\n",
      "[INFO|modeling_utils.py:4372] 2024-09-09 14:23:07,788 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /mnt/workspace/qwen2-1.5B.\n",
      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
      "[INFO|configuration_utils.py:953] 2024-09-09 14:23:07,790 >> loading configuration file /mnt/workspace/qwen2-1.5B/generation_config.json\n",
      "[INFO|configuration_utils.py:1000] 2024-09-09 14:23:07,790 >> Generate config GenerationConfig {\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"do_sample\": true,\n",
      "  \"eos_token_id\": [\n",
      "    151645,\n",
      "    151643\n",
      "  ],\n",
      "  \"pad_token_id\": 151643,\n",
      "  \"repetition_penalty\": 1.1,\n",
      "  \"temperature\": 0.7,\n",
      "  \"top_k\": 20,\n",
      "  \"top_p\": 0.8\n",
      "}\n",
      "\n",
      "09/09/2024 14:23:07 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n",
      "09/09/2024 14:23:07 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
      "09/09/2024 14:23:07 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n",
      "09/09/2024 14:23:07 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n",
      "09/09/2024 14:23:07 - INFO - llamafactory.model.model_utils.misc - Found linear modules: k_proj,gate_proj,up_proj,o_proj,down_proj,v_proj,q_proj\n",
      "09/09/2024 14:23:08 - INFO - llamafactory.model.loader - trainable params: 9,232,384 || all params: 1,552,946,688 || trainable%: 0.5945\n",
      "Detected kernel version 4.19.91, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n",
      "[INFO|trainer.py:642] 2024-09-09 14:23:08,271 >> Using auto half precision backend\n",
      "09/09/2024 14:23:08 - WARNING - llamafactory.train.callbacks - Previous trainer log in this folder will be deleted.\n",
      "[INFO|trainer.py:2128] 2024-09-09 14:23:08,577 >> ***** Running training *****\n",
      "[INFO|trainer.py:2129] 2024-09-09 14:23:08,577 >>   Num examples = 81\n",
      "[INFO|trainer.py:2130] 2024-09-09 14:23:08,577 >>   Num Epochs = 6\n",
      "[INFO|trainer.py:2131] 2024-09-09 14:23:08,577 >>   Instantaneous batch size per device = 1\n",
      "[INFO|trainer.py:2134] 2024-09-09 14:23:08,577 >>   Total train batch size (w. parallel, distributed & accumulation) = 8\n",
      "[INFO|trainer.py:2135] 2024-09-09 14:23:08,577 >>   Gradient Accumulation steps = 8\n",
      "[INFO|trainer.py:2136] 2024-09-09 14:23:08,577 >>   Total optimization steps = 60\n",
      "[INFO|trainer.py:2137] 2024-09-09 14:23:08,580 >>   Number of trainable parameters = 9,232,384\n",
      "{'loss': 3.1246, 'grad_norm': 1.9314517974853516, 'learning_rate': 9.865224352899119e-05, 'epoch': 0.99}\n",
      "{'loss': 1.8508, 'grad_norm': 1.8661845922470093, 'learning_rate': 8.43120818934367e-05, 'epoch': 1.98}\n",
      "{'loss': 1.1225, 'grad_norm': 1.7874681949615479, 'learning_rate': 5.868240888334653e-05, 'epoch': 2.96}\n",
      "{'loss': 0.8865, 'grad_norm': 1.7974295616149902, 'learning_rate': 3.019601169804216e-05, 'epoch': 3.95}\n",
      "{'loss': 0.806, 'grad_norm': 2.012923240661621, 'learning_rate': 8.225609429353187e-06, 'epoch': 4.94}\n",
      "{'loss': 0.7754, 'grad_norm': 1.6349302530288696, 'learning_rate': 0.0, 'epoch': 5.93}\n",
      "100%|███████████████████████████████████████████| 60/60 [01:37<00:00,  1.60s/it][INFO|trainer.py:3478] 2024-09-09 14:24:45,621 >> Saving model checkpoint to saves/Qwen2-1.5B/sft/checkpoint-60\n",
      "/usr/local/lib/python3.10/site-packages/peft/utils/save_and_load.py:195: UserWarning: Could not find a config file in /mnt/workspace/qwen2-1.5B - will assume that the vocabulary was not modified.\n",
      "  warnings.warn(\n",
      "[INFO|tokenization_utils_base.py:2574] 2024-09-09 14:24:45,704 >> tokenizer config file saved in saves/Qwen2-1.5B/sft/checkpoint-60/tokenizer_config.json\n",
      "[INFO|tokenization_utils_base.py:2583] 2024-09-09 14:24:45,704 >> Special tokens file saved in saves/Qwen2-1.5B/sft/checkpoint-60/special_tokens_map.json\n",
      "[INFO|trainer.py:2383] 2024-09-09 14:24:45,935 >> \n",
      "\n",
      "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
      "\n",
      "\n",
      "{'train_runtime': 97.3547, 'train_samples_per_second': 4.992, 'train_steps_per_second': 0.616, 'train_loss': 1.4276446580886841, 'epoch': 5.93}\n",
      "100%|███████████████████████████████████████████| 60/60 [01:37<00:00,  1.62s/it]\n",
      "[INFO|trainer.py:3478] 2024-09-09 14:24:45,937 >> Saving model checkpoint to saves/Qwen2-1.5B/sft\n",
      "/usr/local/lib/python3.10/site-packages/peft/utils/save_and_load.py:195: UserWarning: Could not find a config file in /mnt/workspace/qwen2-1.5B - will assume that the vocabulary was not modified.\n",
      "  warnings.warn(\n",
      "[INFO|tokenization_utils_base.py:2574] 2024-09-09 14:24:45,995 >> tokenizer config file saved in saves/Qwen2-1.5B/sft/tokenizer_config.json\n",
      "[INFO|tokenization_utils_base.py:2583] 2024-09-09 14:24:45,995 >> Special tokens file saved in saves/Qwen2-1.5B/sft/special_tokens_map.json\n",
      "***** train metrics *****\n",
      "  epoch                    =     5.9259\n",
      "  total_flos               =   206463GF\n",
      "  train_loss               =     1.4276\n",
      "  train_runtime            = 0:01:37.35\n",
      "  train_samples_per_second =      4.992\n",
      "  train_steps_per_second   =      0.616\n",
      "Figure saved at: saves/Qwen2-1.5B/sft/training_loss.png\n",
      "09/09/2024 14:24:46 - WARNING - llamafactory.extras.ploting - No metric eval_loss to plot.\n",
      "09/09/2024 14:24:46 - WARNING - llamafactory.extras.ploting - No metric eval_accuracy to plot.\n",
      "[INFO|trainer.py:3788] 2024-09-09 14:24:46,236 >> \n",
      "***** Running Evaluation *****\n",
      "[INFO|trainer.py:3790] 2024-09-09 14:24:46,236 >>   Num examples = 10\n",
      "[INFO|trainer.py:3793] 2024-09-09 14:24:46,236 >>   Batch size = 1\n",
      "100%|███████████████████████████████████████████| 10/10 [00:00<00:00, 23.32it/s]\n",
      "***** eval metrics *****\n",
      "  epoch                   =     5.9259\n",
      "  eval_loss               =     1.0135\n",
      "  eval_runtime            = 0:00:00.48\n",
      "  eval_samples_per_second =     20.622\n",
      "  eval_steps_per_second   =     20.622\n",
      "[INFO|modelcard.py:449] 2024-09-09 14:24:46,723 >> Dropping the following result as it does not have all the necessary fields:\n",
      "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n"
     ]
    }
   ],
   "source": [
    "!llamafactory-cli train examples/train_lora/qwen2_lora_sft.yaml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f102c68e-7f68-45e0-8e91-7982e4df6b72",
   "metadata": {},
   "source": [
    "# 3.动态验证\n",
    "notebook搭建gradio界面有bug，因此可以下载模型文件到本地测试，或者跳过此步骤直接合并"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "225e4fce-b0f1-42e1-823a-dcb45268189f",
   "metadata": {},
   "source": [
    "# 4.合并权重文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "531e5b53-3e5a-4e8e-b39a-f778219b6edf",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2024-09-09T06:30:08.628539Z",
     "iopub.status.busy": "2024-09-09T06:30:08.628232Z",
     "iopub.status.idle": "2024-09-09T06:30:26.491863Z",
     "shell.execute_reply": "2024-09-09T06:30:26.491290Z",
     "shell.execute_reply.started": "2024-09-09T06:30:08.628520Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.10/site-packages/_distutils_hack/__init__.py:55: UserWarning: Reliance on distutils from stdlib is deprecated. Users must rely on setuptools to provide the distutils module. Avoid importing distutils or import setuptools first, and avoid setting SETUPTOOLS_USE_DISTUTILS=stdlib. Register concerns at https://github.com/pypa/setuptools/issues/new?template=distutils-deprecation.yml\n",
      "  warnings.warn(\n",
      "[2024-09-09 14:30:13,889] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n",
      "\u001b[93m [WARNING] \u001b[0m Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH\n",
      "\u001b[93m [WARNING] \u001b[0m sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.3\n",
      "\u001b[93m [WARNING] \u001b[0m using untested triton version (2.3.1), only 1.0.0 is known to be compatible\n",
      "[INFO|configuration_utils.py:731] 2024-09-09 14:30:16,572 >> loading configuration file /mnt/workspace/qwen2-1.5B/config.json\n",
      "[INFO|configuration_utils.py:800] 2024-09-09 14:30:16,573 >> Model config Qwen2Config {\n",
      "  \"_name_or_path\": \"/mnt/workspace/qwen2-1.5B\",\n",
      "  \"architectures\": [\n",
      "    \"Qwen2ForCausalLM\"\n",
      "  ],\n",
      "  \"attention_dropout\": 0.0,\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"eos_token_id\": 151645,\n",
      "  \"hidden_act\": \"silu\",\n",
      "  \"hidden_size\": 1536,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 8960,\n",
      "  \"max_position_embeddings\": 32768,\n",
      "  \"max_window_layers\": 28,\n",
      "  \"model_type\": \"qwen2\",\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 28,\n",
      "  \"num_key_value_heads\": 2,\n",
      "  \"rms_norm_eps\": 1e-06,\n",
      "  \"rope_theta\": 1000000.0,\n",
      "  \"sliding_window\": 32768,\n",
      "  \"tie_word_embeddings\": true,\n",
      "  \"torch_dtype\": \"bfloat16\",\n",
      "  \"transformers_version\": \"4.42.4\",\n",
      "  \"use_cache\": true,\n",
      "  \"use_sliding_window\": false,\n",
      "  \"vocab_size\": 151936\n",
      "}\n",
      "\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,573 >> loading file vocab.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,573 >> loading file merges.txt\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,574 >> loading file tokenizer.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,574 >> loading file added_tokens.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,574 >> loading file special_tokens_map.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,574 >> loading file tokenizer_config.json\n",
      "[WARNING|logging.py:313] 2024-09-09 14:30:16,741 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "[INFO|configuration_utils.py:731] 2024-09-09 14:30:16,742 >> loading configuration file /mnt/workspace/qwen2-1.5B/config.json\n",
      "[INFO|configuration_utils.py:800] 2024-09-09 14:30:16,742 >> Model config Qwen2Config {\n",
      "  \"_name_or_path\": \"/mnt/workspace/qwen2-1.5B\",\n",
      "  \"architectures\": [\n",
      "    \"Qwen2ForCausalLM\"\n",
      "  ],\n",
      "  \"attention_dropout\": 0.0,\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"eos_token_id\": 151645,\n",
      "  \"hidden_act\": \"silu\",\n",
      "  \"hidden_size\": 1536,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 8960,\n",
      "  \"max_position_embeddings\": 32768,\n",
      "  \"max_window_layers\": 28,\n",
      "  \"model_type\": \"qwen2\",\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 28,\n",
      "  \"num_key_value_heads\": 2,\n",
      "  \"rms_norm_eps\": 1e-06,\n",
      "  \"rope_theta\": 1000000.0,\n",
      "  \"sliding_window\": 32768,\n",
      "  \"tie_word_embeddings\": true,\n",
      "  \"torch_dtype\": \"bfloat16\",\n",
      "  \"transformers_version\": \"4.42.4\",\n",
      "  \"use_cache\": true,\n",
      "  \"use_sliding_window\": false,\n",
      "  \"vocab_size\": 151936\n",
      "}\n",
      "\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,743 >> loading file vocab.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,743 >> loading file merges.txt\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,743 >> loading file tokenizer.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,743 >> loading file added_tokens.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,743 >> loading file special_tokens_map.json\n",
      "[INFO|tokenization_utils_base.py:2159] 2024-09-09 14:30:16,743 >> loading file tokenizer_config.json\n",
      "[WARNING|logging.py:313] 2024-09-09 14:30:16,909 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "09/09/2024 14:30:16 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
      "[INFO|configuration_utils.py:731] 2024-09-09 14:30:16,927 >> loading configuration file /mnt/workspace/qwen2-1.5B/config.json\n",
      "[INFO|configuration_utils.py:800] 2024-09-09 14:30:16,928 >> Model config Qwen2Config {\n",
      "  \"_name_or_path\": \"/mnt/workspace/qwen2-1.5B\",\n",
      "  \"architectures\": [\n",
      "    \"Qwen2ForCausalLM\"\n",
      "  ],\n",
      "  \"attention_dropout\": 0.0,\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"eos_token_id\": 151645,\n",
      "  \"hidden_act\": \"silu\",\n",
      "  \"hidden_size\": 1536,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 8960,\n",
      "  \"max_position_embeddings\": 32768,\n",
      "  \"max_window_layers\": 28,\n",
      "  \"model_type\": \"qwen2\",\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 28,\n",
      "  \"num_key_value_heads\": 2,\n",
      "  \"rms_norm_eps\": 1e-06,\n",
      "  \"rope_theta\": 1000000.0,\n",
      "  \"sliding_window\": 32768,\n",
      "  \"tie_word_embeddings\": true,\n",
      "  \"torch_dtype\": \"bfloat16\",\n",
      "  \"transformers_version\": \"4.42.4\",\n",
      "  \"use_cache\": true,\n",
      "  \"use_sliding_window\": false,\n",
      "  \"vocab_size\": 151936\n",
      "}\n",
      "\n",
      "09/09/2024 14:30:16 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
      "[INFO|modeling_utils.py:3553] 2024-09-09 14:30:16,944 >> loading weights file /mnt/workspace/qwen2-1.5B/model.safetensors\n",
      "[INFO|modeling_utils.py:1531] 2024-09-09 14:30:16,952 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
      "[INFO|configuration_utils.py:1000] 2024-09-09 14:30:16,954 >> Generate config GenerationConfig {\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"eos_token_id\": 151645\n",
      "}\n",
      "\n",
      "[INFO|modeling_utils.py:4364] 2024-09-09 14:30:17,587 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
      "\n",
      "[INFO|modeling_utils.py:4372] 2024-09-09 14:30:17,587 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /mnt/workspace/qwen2-1.5B.\n",
      "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
      "[INFO|configuration_utils.py:953] 2024-09-09 14:30:17,590 >> loading configuration file /mnt/workspace/qwen2-1.5B/generation_config.json\n",
      "[INFO|configuration_utils.py:1000] 2024-09-09 14:30:17,590 >> Generate config GenerationConfig {\n",
      "  \"bos_token_id\": 151643,\n",
      "  \"do_sample\": true,\n",
      "  \"eos_token_id\": [\n",
      "    151645,\n",
      "    151643\n",
      "  ],\n",
      "  \"pad_token_id\": 151643,\n",
      "  \"repetition_penalty\": 1.1,\n",
      "  \"temperature\": 0.7,\n",
      "  \"top_k\": 20,\n",
      "  \"top_p\": 0.8\n",
      "}\n",
      "\n",
      "09/09/2024 14:30:17 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
      "09/09/2024 14:30:21 - INFO - llamafactory.model.adapter - Merged 1 adapter(s).\n",
      "09/09/2024 14:30:21 - INFO - llamafactory.model.adapter - Loaded adapter(s): ./saves/Qwen2-1.5B/sft\n",
      "09/09/2024 14:30:21 - INFO - llamafactory.model.loader - all params: 1,543,714,304\n",
      "09/09/2024 14:30:21 - INFO - llamafactory.train.tuner - Convert model dtype to: torch.bfloat16.\n",
      "[INFO|configuration_utils.py:472] 2024-09-09 14:30:21,721 >> Configuration saved in ./saves/Qwen2-1.5B/output/config.json\n",
      "[INFO|configuration_utils.py:769] 2024-09-09 14:30:21,721 >> Configuration saved in ./saves/Qwen2-1.5B/output/generation_config.json\n",
      "[INFO|modeling_utils.py:2690] 2024-09-09 14:30:24,606 >> Model weights saved in ./saves/Qwen2-1.5B/output/model.safetensors\n",
      "[INFO|tokenization_utils_base.py:2574] 2024-09-09 14:30:24,606 >> tokenizer config file saved in ./saves/Qwen2-1.5B/output/tokenizer_config.json\n",
      "[INFO|tokenization_utils_base.py:2583] 2024-09-09 14:30:24,607 >> Special tokens file saved in ./saves/Qwen2-1.5B/output/special_tokens_map.json\n"
     ]
    }
   ],
   "source": [
    "!llamafactory-cli export --model_name_or_path /mnt/workspace/qwen2-1.5B --adapter_name_or_path ./saves/Qwen2-1.5B/sft --template qwen --finetuning_type lora --export_dir ./saves/Qwen2-1.5B/output --export_size 4"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
